################################################################################
# Format Data for: "Why Join? How Civil Society Organizations' Attributes 
# Signal Congruence and Impact Community Engagement"
# author: Simon Hoellerbauer
# date created: September 10, 2021
# date last edited: September 10, 2021
################################################################################

#packages
#package version used for final paper analysis noted here
library(tidyverse) #for easier data manipulation; version 1.3.1
library(labelled) #for dealing with labeled data; version 2.8.0

# R Version for final paper analysis: 4.1.1
# RStudio Version for final paper analysis: 1.4.1717

#setting working directory 
## User must change wd_path to the location on their computer where vendor_long.Rdata
## is saved and where org_level.Rdata is to be saved
wd_path <- 'must/change/to/data/files/location'
setwd(wd_path)

#load vendor_long data
load("vendor_long.RData")
# note that vendor_long.RData is a subset of the variables and observations 
#  of the larger LGAP/TAD endline survey described in Appendix A 
# it was lightly cleaned for the analysis of that survey before being used here

# duplicate outcome variables, this makes it easier to turn data into org_level
vendor_long <- mutate(vendor_long,
                      pair1a_meeting = pair1_meeting,
                      pair1b_meeting = pair1_meeting,
                      pair1a_scandal = pair1_scandal,
                      pair1b_scandal = pair1_scandal,
                      pair2a_meeting = pair2_meeting,
                      pair2b_meeting = pair2_meeting,
                      pair2a_scandal = pair2_scandal,
                      pair2b_scandal = pair2_scandal)

#remove other vanilla versions
vendor_long$pair1_meeting <- vendor_long$pair1_scandal <- 
  vendor_long$pair2_meeting <- vendor_long$pair2_scandal <- NULL


#initializing org_level
org_level <- vendor_long

#gathering pair columns
org_level <- gather(org_level, key, value, -c(resp_id:member_in)) %>% 
  extract(key, c("pair", "question"), "(pair[12][ab])_(.*)") %>% 
  spread(question, value)

#rename attribute columns
org_level <- dplyr::rename(org_level, capital = att1_value,
                           leader_frmr_prof =  att2_value,
                           funding = att3_value,
                           party = att4_value)

# add labels to attributes, then convert to factors
## function that will help us do this all at once
labelled.to_factor <- function(data, vars, labels){
  k <- length(vars)
  
  for(i in 1:k){
    #temp_vec <- pull(data, vars[i])
    data[vars[i]] <- labelled(unlist(data[vars[i]]), labels[[i]]) %>% to_factor
  }
  
  return(data)
  
}

#create vector of vars to turn from labelled into factors
vars <- c("capital", "leader_frmr_prof", "funding", "party")
#create list of labels
label_list <- list(c(Western_Donor = "1",
                     South_Africa = "2",
                     Lilongwe = "3",
                     District_Capital = "4"),
                   c(vendor = "1",
                     carpenter = "2",
                     laborer = "3",
                     business_owner = "4",
                     bureaucrat = "5",
                     politician = "6"),
                   c(Western_gov = "1",
                     Chinese_gov = "2",
                     South_African_gov = "3",
                     Malawian_gov = "4",
                     citizen_contr = "5"),
                   c(connected_to = "1",
                     indep_of = "2"))

#turning attributes into factors, faciliates regression
org_level <- labelled.to_factor(org_level, 
                                vars,
                                label_list)

##THIS WAS BEFORE RELEVELING TO HAVE LOCAL AS BASE                               
# relevel leader_frmr_prof in order to make more foreign prof base category
#org_level$leader_frmr_prof <- factor(org_level$leader_frmr_prof,
#                                     c("politician", "bureaucrat", 
#                                       "business_owner", "laborer",
#                                       "carpenter", "vendor"))

#removing names attribute from attribute variables
names(org_level$capital) <- NULL
names(org_level$leader_frmr_prof) <- NULL
names(org_level$funding) <- NULL
names(org_level$party) <- NULL


#releveling capital, funding, and party to have most local as base
org_level <- org_level %>% 
  mutate(capital = factor(capital, levels = levels(capital)[4:1]),
         funding = factor(funding, levels = levels(funding)[5:1]),
         party = factor(party, levels = levels(party)[2:1]))

#turning meeting and scandal into 1 and 0 vectors, for analysis:
#have to identify whether organization picked for meeting or scandal
#matches last letter of pair id

#write function that does this for us
find.pair.match <- function(data, vars){
  
  tf_matches <- matrix(NA, nrow = nrow(data), ncol = length(vars))
  
  for(i in 1:length(vars)){
    temp_vec1 <- unlist(data[vars[[i]][1]])
    temp_vec2 <- unlist(data[vars[[i]][2]])
    
    for(n in 1:nrow(data)){
      tf_matches[n, i] <- str_sub(temp_vec1[n], -1) == 
        tolower(str_sub(temp_vec2[n], -1))
    }
    
  }
  
  return(tf_matches)
}

#call function on our variables of interest
tf_matches <- find.pair.match(org_level, vars = list(c("pair", "meeting"),
                                                     c("pair", "scandal")))
#using output to create outcome variables
org_level$meeting_yn <- as.integer(tf_matches[,1])
org_level$scandal_yn <- as.integer(tf_matches[,2])

#flipping scandal so that it matches up with meeting in substantive terms
org_level$scandal_ny <- abs(org_level$scandal_yn - 1)

#arrange data by respondent
org_level <- arrange(org_level, resp_id)

#make enum into factor variable
org_level$enum <- factor(org_level$enum)

#indicator var for pair1 and pair2
org_level$pair_num <- grepl("pair2", org_level$pair) %>% as.numeric + 1

#making a pair_id variable for CausalANOVA
org_level <- org_level %>% mutate(pair_id = paste(resp_id, pair_num, sep = "_"))

#making a factor version of apathy_in for cregg
org_level$apathy_fac <- factor(ifelse(org_level$apathy_in, "Agree", "Disagree"),
                               levels = c("Disagree", "Agree"))

#making a factor version of pair_num for cregg
org_level$pair_fac <- as.factor(org_level$pair_num)

# save as RData objects, to make analysis easier
save(org_level, file =  "org_level.RData")
